home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Software Vault: The Gold Collection
/
Software Vault - The Gold Collection (American Databankers) (1993).ISO
/
cdr47
/
lz13.zip
/
LZCOMP3.ASM
< prev
next >
Wrap
Assembly Source File
|
1993-05-01
|
8KB
|
318 lines
title lzcomp - file compressor using limpel-ziv algorithm
;Tom Pfau
;Digital Equipment Corporation
;Parsippany, NJ
;
;v1.2, Toad Hall Tweak
; - Converting to COM file.
; - Gonna use BP to hold bit_offset throughout (faster)
; - Moved most buffers outside code space.
; - Now accepts input file on command line, otherwise inputs
; from StdIn
; - Always outputs to StdOut
; Nuts .. StdIn doesn't seem to work .. donno why.
; Won't work as a filter either (which I had hoped for).
; WILL take cmd line file name, redirect it appropriately.
;
;David Kirschbaum
;Toad Hall
;kirsch@braggvax.ARPA
;Constants
CLEAR equ 256 ;Clear code
EOF equ 257 ;End of file marker
FIRST_FREE equ 258 ;First free code
MAXMAX equ 4096 ;Max code + 1
BUFFSIZE EQU 4096 ;TH read/write buffers
include macros2.mlb
;Hash table entry
hash_rec struc
first dw ? ; First entry with this value
next dw ? ; Next entry along chain
char db ? ; Suffix char
hash_rec ends
;Declare Segments
Code segment para public 'code'
assume CS:Code, DS:Code, ES:Code
org 100H
LzComp proc near
jmp Start
;TH collecting all data segment stuff
input_handle dw 0 ;default StdIn
prefix_code dw 0 ;?
free_code dw 0 ;?
max_code dw 0 ;?
nbits dw 0 ;?
k db 0 ;?
;bit_offset dw 0 ;?
input_offset dw 0
input_size dw 0
LzComp endp
start proc near
;TH we won't mess with any memory allocating, since I think
;all required buffers, hash tables, etc. can work within
;a lousy 64Kb segment.
;Won't even bother moving the stackpointer for now either.
mov si,80H ;point to PSP cmd line
lodsb ;snarf cmd line len byte
or al,al ;anything on cmd line?
jz Start_Process ;nope, use < stdin
xor ah,ah ;clear msb
;SI now points to 81H
mov dx,si ;not quite first char
inc dx ;DX points to target filename
add si,ax ;point to beyond last char
mov [si],ah ;make file name AsciiZ
mov ax,3D00H ;open file/device
int 21H
jb Terminate ;failed somehow, die
mov input_handle,ax ;save input handle
Start_Process:
call compress ;Compress file
Terminate:
mov ah,4CH ;terminate, errorlevel in AL
int 21H
start endp
compress proc near
l1: call init_table ;Initialize the table and some vars
mov ax,CLEAR ;Write a clear code
call write_code
call read_char ;Read first char
l4: xor ah,ah ;Turn char into code
l4a: mov prefix_code,ax ;Set prefix code
call read_char ;Read next char
jc l17 ;Carry means EOF
mov k,al ;Save char in k
mov bx,prefix_code ;Get prefix code
call lookup_code ;See if this pair in table
jnc l4a ;nc means yes, new code in ax
call add_code ;Add pair to table
push bx ;Save new code
mov ax,prefix_code ;Write old prefix code
call write_code
pop bx
mov al,k ;Get last char
cmp bx,max_code ;Exceed code size?
jl l4 ;less means no
cmp nbits,12 ;Currently less than 12 bits?
jl l14 ;yes
mov ax,CLEAR ;Write a clear code
call write_code
call init_table ;Reinit table
mov al,k ;get last char
jmp l4 ;Start over
l14: inc nbits ;Increase number of bits
shl max_code,1 ;Double max code size
jmp l4 ;Get next char
l17: mov ax,prefix_code ;Write last code
call write_code
mov ax,EOF ;Write EOF code
call write_code
mov ax,bp ;bit_offset ;Make sure buffer is flushed to file
or ax,ax ;TH
je l18
mov cx,8 ;convert bits to bytes
xor dx,dx
div cx
or dx,dx ;If extra bits, make sure they get
je l17a ;written
inc ax
l17a: call flush
l18: ret
compress endp
init_table proc near
mov nbits,9 ;Set code size to 9
mov max_code,512 ;Set max code to 512
mov ax,-1 ;Unused flag
mov cx,640 ;Clear first 256 entries
mov di,offset hash ;TH Point to first entry
rep stosw ;Clear it out
mov free_code,FIRST_FREE ;Set next code to use
ret ;done
init_table endp
write_code proc near
push ax ;Save code
mov ax,bp ;bit_offset ;Get bit offset
;TH we're keeping bit_offset in BP
; mov cx,nbits ;Adjust bit offset by code size
; add bit_offset,cx
add bp,nbits ;TH adjust bit offset by code size
mov cx,8 ;Convert bit offset to byte offset
xor dx,dx
div cx
; cmp ax,1020 ;Approaching end of buffer?
cmp ax,BUFFSIZE-4 ;TH approaching end of buffer?
jl wc1 ;less means no
call flush ;Write the buffer
;TH we're keeping bit_offset in BP
; push dx ;dx contains offset within byte
; add dx,nbits ;adjust by code size
; mov bit_offset,dx ;new bit offset
; pop dx ;restore dx
mov bp,dx ;TH dx contains offset within byte
add bp,nbits ;TH adjust by code size
add ax,offset output_data ;Point to last byte
mov si,ax ;put in si
mov al,[si] ;move byte to first position
mov byte ptr output_data,al
xor ax,ax ;Byte offset of zero
wc1: add ax,offset output_data ;Point into buffer
mov di,ax ;Destination
pop ax ;Restore code
mov cx,dx ;offset within byte
xor dx,dx ;dx will catch bits rotated out
jcxz wc3 ;If offset in byte is zero, skip shift
wc2: shl ax,1 ;Rotate code
rcl dx,1
loop wc2
or al,[di] ;Grab bits currently in buffer
wc3: stosw ;Save data
mov al,dl ;Grab extra bits
stosb ;and save
ret
write_code endp
flush proc near
push ax ;Save all registers
push dx
mov dx,offset output_data ;buffer to write
mov cx,ax ;AX contains number of bytes to write
mov bx,1 ;StdOut
mov ah,40H ;write to file/device
int 21H
pop dx
pop ax
jb File_Error ;failed somehow
ret
flush endp
read_char proc near
mov di,input_offset ;Anything left in buffer?
cmp di,input_size
jl rd1 ;less means yes
mov dx,offset input_data ;buffer to read into
mov bx,input_handle ;input handle
; mov cx,1024 ;read this many bytes
mov cx,BUFFSIZE ;TH read this many bytes
mov ah,3FH ;read from file/device
int 21H
jb File_Error ;failed somehow, die
or ax,ax ;anything read?
jz rd2 ;nope, we're finished
mov input_size,ax ;Save bytes read
xor di,di ;TH clear DI
mov input_offset,di ;TH 0 ;Point to beginning of buffer
rd1:
;TH the mov/add instrs below are faster than the LEA.
; lea si,input_data[di] ;Point at character
mov si,offset input_data ;TH
add si,di ;Point at char
lodsb ;Read it in
inc input_offset ;Adjust pointer
clc ;Success
ret
rd2: stc ;Nothing left
ret
File_Error:
jmp Terminate ;File error, terminate
;errorlevel in AL
read_char endp
lookup_code proc near
call index ;convert code to address
xor di,di ;TH ;flag
cmp [si].first,-1 ;Has this code been used?
je gc4 ;equal means no
inc di ;set flag
mov bx,[si].first ;Get first entry
gc2: call index ;convert code to address
cmp [si].char,al ;is char the same?
jne gc3 ;ne means no
clc ;success
mov ax,bx ;put found code in ax
ret ;done
gc3: cmp [si].next,-1 ;More left with this prefix?
je gc4 ;equal means no
mov bx,[si].next ;get next code
jmp gc2 ;try again
gc4: stc ;not found
ret ;done
lookup_code endp
index proc near
mov si,bx ;si = bx * 5 (5 byte hash entries)
shl si,1 ;si = bx * 2 * 2 + bx
shl si,1
add si,bx
add si,offset hash ;TH plus hash table base
ret
index endp
add_code proc near
;Only called once
mov bx,free_code ;Get code to use
or di,di ;TH ;First use of this prefix?
je ac1 ;equal means yes
mov [si].next,bx ;point last use to new entry
jmp short ac2
ac1: mov [si].first,bx ;Point first use to new entry
ac2: cmp bx,MAXMAX ;Have we reached code limit?
je ac3 ;equal means yes, just return
call index ;get address of new entry
;TH switched around a little
mov [si].char,al ;save suffix char
mov ax,-1 ;TH do this once (ok to destroy AX)
mov [si].first,ax ;-1 ;initialize pointers
mov [si].next,ax ;-1
inc free_code ;TH adjust next code
ac3: ret
add_code endp
;TH input/output buffers moved here outside code space
even
output_data equ $ ;db 1024 dup (?)
input_data equ output_data+BUFFSIZE ;db 1024 dup (?)
;Instead of using a separate segment for memory and the hash
;table (with all the resultant segment register fiddling),
;gonna just use a dynamic buffer right in code space.
hash equ input_data+BUFFSIZE ;1024
code ends
end LzComp